# setup
knitr::opts_chunk$set(message=FALSE, fig.height=3, fig.width=5)
library(ggplot2) # to make pretty plots
library(dplyr) # for inner_join and left_join
library(car) # to recode
library(maps) # for the maps
library(gridExtra) # to arrange the plots in grids
library(plotly) # for interactive plots
library(cowplot) # to have get_legent
happy_2015 <- read.csv("2015.csv")
happy_2016 <- read.csv("2016.csv")
happy_2017 <- read.csv("2017.csv")
str(happy_2015)
'data.frame': 158 obs. of 12 variables:
$ Country : Factor w/ 158 levels "Afghanistan",..: 136 59 38 106 25 46 100 135 101 7 ...
$ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 6 10 10 10 1 1 ...
$ Happiness.Rank : int 1 2 3 4 5 6 7 8 9 10 ...
$ Happiness.Score : num 7.59 7.56 7.53 7.52 7.43 ...
$ Standard.Error : num 0.0341 0.0488 0.0333 0.0388 0.0355 ...
$ Economy..GDP.per.Capita. : num 1.4 1.3 1.33 1.46 1.33 ...
$ Family : num 1.35 1.4 1.36 1.33 1.32 ...
$ Health..Life.Expectancy. : num 0.941 0.948 0.875 0.885 0.906 ...
$ Freedom : num 0.666 0.629 0.649 0.67 0.633 ...
$ Trust..Government.Corruption.: num 0.42 0.141 0.484 0.365 0.33 ...
$ Generosity : num 0.297 0.436 0.341 0.347 0.458 ...
$ Dystopia.Residual : num 2.52 2.7 2.49 2.47 2.45 ...
colnames(happy_2015) <- c("Country","Region","Rank_2015","Score_2015","Err_2015","GDP_2015","Family_2015","Health_2015","Freedom_2015","Gov_2015","Generosity_2015","Dist_res_2015")
colnames(happy_2016) <- c("Country","Region_2016","Rank_2016","Score_2016","LCI_2016","UCI_2016","GDP_2016","Family_2016","Health_2016","Freedom_2016","Gov_2016","Generosity_2016","Dist_res_2016")
colnames(happy_2017) <- c("Country","Rank_2017","Score_2017","Whisker.high_2016","Whisker.low_2017","GDP_2017","Family_2017","Health_2017","Freedom_2017","Generosity_2017","Gov_2017","Dist_res_2017")
happy <- merge(happy_2015, happy_2016, by.x = "Country", by.y = "Country")
happy <- merge(happy, happy_2017, by.x = "Country", by.y = "Country")
happy$Region_2016 <- NULL
head(happy)
Country Region Rank_2015 Score_2015
1 Afghanistan Southern Asia 153 3.575
2 Albania Central and Eastern Europe 95 4.959
3 Algeria Middle East and Northern Africa 68 5.605
4 Angola Sub-Saharan Africa 137 4.033
5 Argentina Latin America and Caribbean 30 6.574
6 Armenia Central and Eastern Europe 127 4.350
Err_2015 GDP_2015 Family_2015 Health_2015 Freedom_2015 Gov_2015
1 0.03084 0.31982 0.30285 0.30335 0.23414 0.09719
2 0.05013 0.87867 0.80434 0.81325 0.35733 0.06413
3 0.05099 0.93929 1.07772 0.61766 0.28579 0.17383
4 0.04758 0.75778 0.86040 0.16683 0.10384 0.07122
5 0.04612 1.05351 1.24823 0.78723 0.44974 0.08484
6 0.04763 0.76821 0.77711 0.72990 0.19847 0.03900
Generosity_2015 Dist_res_2015 Rank_2016 Score_2016 LCI_2016 UCI_2016
1 0.36510 1.95210 154 3.360 3.288 3.432
2 0.14272 1.89894 109 4.655 4.546 4.764
3 0.07822 2.43209 38 6.355 6.227 6.483
4 0.12344 1.94939 141 3.866 3.753 3.979
5 0.11451 2.83600 26 6.650 6.560 6.740
6 0.07855 1.75873 121 4.360 4.266 4.454
GDP_2016 Family_2016 Health_2016 Freedom_2016 Gov_2016 Generosity_2016
1 0.38227 0.11037 0.17344 0.16430 0.07112 0.31268
2 0.95530 0.50163 0.73007 0.31866 0.05301 0.16840
3 1.05266 0.83309 0.61804 0.21006 0.16157 0.07044
4 0.84731 0.66366 0.04991 0.00589 0.08434 0.12071
5 1.15137 1.06612 0.69711 0.42284 0.07296 0.10989
6 0.86086 0.62477 0.64083 0.14037 0.03616 0.07793
Dist_res_2016 Rank_2017 Score_2017 Whisker.high_2016 Whisker.low_2017
1 2.14558 141 3.794 3.873661 3.714338
2 1.92816 109 4.644 4.752464 4.535536
3 3.40904 53 5.872 5.978286 5.765714
4 2.09459 140 3.795 3.951642 3.638358
5 3.12985 24 6.599 6.690085 6.507915
6 1.97864 121 4.376 4.466735 4.285265
GDP_2017 Family_2017 Health_2017 Freedom_2017 Generosity_2017
1 0.4014772 0.5815433 0.18074678 0.1061795 0.31187093
2 0.9961928 0.8036852 0.73115975 0.3814986 0.20131294
3 1.0918645 1.1462175 0.61758465 0.2333358 0.06943665
4 0.8584282 1.1044120 0.04986867 0.0000000 0.09792649
5 1.1852955 1.4404511 0.69513708 0.4945192 0.10945706
6 0.9005967 1.0074837 0.63752443 0.1983033 0.08348809
Gov_2017 Dist_res_2017
1 0.06115783 2.150801
2 0.03986422 1.490442
3 0.14609611 2.567604
4 0.06972034 1.614482
5 0.05973989 2.614005
6 0.02667442 1.521499
happy$Country <- recode(happy$Country,
"'Congo (Brazzaville)' = 'Democratic Republic of the Congo';
'Congo (Kinshasa)'='Republic of Congo';
'United States'='USA';
'United Kingdom'= 'UK'
")
class(happy[,1])
[1] "factor"
happy[,1]=as.character(happy[,1])
w <- map_data("world")
colnames(w)[5]<- "Country"
myw <- inner_join(w, happy, by = "Country")
worldplot <- ggplot(data = w, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1) +
geom_polygon(color = "black", fill = "gray") +
geom_polygon(data=myw, aes(x = long, y = lat, group = group, fill = Score_2017),color = "white") +
geom_polygon(color = "black", fill = NA) +
theme_bw() +
ggtitle("Happiness Score in 2017 in the World") +
scale_fill_distiller(palette = "Spectral")
worldplot
happy$var_2016_2015 = 100*(happy$Score_2016 - happy$Score_2015)/happy$Score_2015
happy$var_2017_2016 = 100*(happy$Score_2017 - happy$Score_2016)/happy$Score_2016
happy$var_2017_2015 = 100*(happy$Score_2017 - happy$Score_2015)/happy$Score_2015
pv <- ggplot(data=happy, aes(x=var_2017_2015, y=Region, text =paste("country:", Country))) +
geom_point(aes(color=Score_2017), size=3, alpha=0.6) +
xlab("Happiness variation between 2015 and 2017 [%]")+
scale_colour_distiller(palette = "Spectral")+
theme_bw()
#scale_colour_gradientn(colours=rainbow(3)) # different colour scheme I tested, but in the end I line Spectral better
pv <- ggplotly(pv)
pv
kaggle <- read.csv("Kaggle.csv")
colnames(kaggle)[1] <- "Country"
merged <- merge(happy, kaggle, by.x = "Country", by.y = "Country")
ggplot(data=merged, aes(y=Homicide.rate.per.100k.people.2008.2012, x=Score_2015))+
theme_bw() +
geom_point(aes(color=Region), size=3, alpha=0.6) +
geom_smooth() +
ylab("Homicide Rate per 100k people 2008-2012")+
xlab("Happiness Score in 2015")
srf <- ggplot(data=merged, aes(y=Female.Suicide.Rate.100k.people, x=Score_2015))+
theme_bw() +
geom_point(aes(color=Region), size=3, alpha=0.6) + geom_smooth()+
ylab("Female Suicide Rate per 100k people")+
xlab("Happiness Score in 2015")
srm <- ggplot(data=merged, aes(y=MaleSuicide.Rate.100k.people, x=Score_2015))+
theme_bw() +
geom_point(aes(color=Region), size=3, alpha=0.6) +
geom_smooth() +
ylab("Male Suicide Rate per 100k people")+
xlab("Happiness Score in 2015")
grid.arrange(srf+theme(legend.position = "none"), srm+theme(legend.position = "none"), get_legend(srm), ncol = 3)
europe<-merged[ merged$Region=="Western Europe", ]
srfe <- ggplot(data=europe, aes(y=Female.Suicide.Rate.100k.people, x=Score_2015))+
theme_bw() +
geom_smooth()+
scale_color_distiller(palette = "Spectral")+
geom_text(aes(label=Country)) +
xlab("Happiness Score in 2015") +
geom_point(aes(color=GDP_2015), size=4, alpha=0.6) +
ylab("Female Suicide Rate per 100k people")
srme <-ggplot(data=europe, aes(y=MaleSuicide.Rate.100k.people, x=Score_2015))+
theme_bw() +
geom_smooth()+
scale_color_distiller(palette = "Spectral")+
geom_text(aes(label=Country)) +
xlab("Happiness Score in 2015") +
geom_point(aes(color=GDP_2015), size=4, alpha=0.6) +
ylab("Male Suicide Rate per 100k people")
grid.arrange(srfe+theme(legend.position="none"), srme+theme(legend.position="none"), get_legend(srme), ncol = 3,widths=c(3,3,1))
weather_all <- read.csv("GlobalLandTemperaturesByCountry.csv",fileEncoding='UTF-8')
weather_2013_01 <- weather_all[weather_all$dt=="2013-01-01",]
weather_2013_01[weather_2013_01$Country=="Denmark",]<-weather_2013_01[weather_2013_01$Country=="Denmark (Europe)",]
merged <- merge(merged, weather_2013_01, by.x = "Country", by.y = "Country")
europe<-merged[ merged$Region=="Western Europe", ]
happy_temp <- ggplot(data=europe, aes(y=AverageTemperature, x=Score_2015))+
theme_bw() +
geom_point(aes(color=GDP_2015), size=4, alpha=0.6) +
geom_smooth() +
scale_color_distiller(palette = "Spectral")+
geom_text(aes(label=Country)) +
ylab("Average Temperature Jan 2013")+
xlab("Happiness Score in 2015")
happy_temp
pm <- ggplot(data=europe, aes(y=MaleSuicide.Rate.100k.people, x=AverageTemperature))+
geom_point(aes(color=Score_2015), size=4, alpha=0.6) +
theme_bw() +
geom_smooth() +
geom_text(aes(label=Country)) +
scale_color_distiller(palette = "Spectral")+
ylab("Male Suicide Rate per 100k people")+
xlab("Average Temperature Jan 2013")
pf <- ggplot(data=europe, aes(y=Female.Suicide.Rate.100k.people, x=AverageTemperature))+
geom_point(aes(color=Score_2015), size=4, alpha=0.6) +
theme_bw() +
geom_smooth() +
geom_text(aes(label=Country)) +
scale_color_distiller(palette = "Spectral")+
ylab("Female Suicide Rate per 100k people")+
xlab("Average Temperature Jan 2013")
grid.arrange(pf+ theme(legend.position = "none"), pm+ theme(legend.position = "none"), get_legend(pf), ncol = 3, widths=c(3,3,1))
#### 最後可以看出越冷的國家,自殺率也會越高